from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import itertools
import matplotlib.pyplot as plt
from bson.objectid import ObjectId
from sgmtradingcore.analytics.metrics import flat_capital_metrics
from stratagemdataprocessing.dbutils.mongo import MongoPersister
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (16, 8)
db = MongoPersister.init_from_config('backtesting', auto_connect=True)
name = 'coint'
desc = 'cochrane_orcutt'
code = 'ft12_fttp.nba'
mnemonic = 'ts.mmp_entryexit_barebones'
trading_id = '562f5bef497aee1c22000001'
config_id = '5a02ec269316de0f57457af3'
print 'Strategy name:', name
print 'Strategy desc:', desc
print 'Strategy code:', code
print 'Mnemonic: ', mnemonic
from pprint import pprint
pprint(db['strategy_configurations'].find_one({'_id': ObjectId(config_id)}))
def to_dataframe(orders):
settled = filter(lambda o: o['status_str'] == 'SETTLED', orders)
cols = ['placed_time', 'pnl', 'size', 'bet_side', 'price', 'date_day', 'event_id', 'sticker', 'details']
rcols = {'placed_time': 'dt', 'size': 'stake', 'price': 'odds', 'date_day': 'date'}
df = pd.DataFrame(settled, columns=cols).rename(columns=rcols)
df['is_back'] = (df['bet_side'] == 'back')
df['capital'] = 10000
df['reason'] = df['details'].apply(lambda d: d.get('reason'))
df['pair_key'] = df['details'].apply(lambda d: tuple(d['pair_key']))
df['portfolio_id'] = df['details'].apply(lambda d: d['portfolio_id'])
df['plausible_returns'] = df['details'].apply(lambda d: d.get('plausible_returns', []))
df['weight'] = df['details'].apply(lambda d: None if d['trade_intention']['name'] != 'OpenTradeSingleSticker' else d['signals'][0]['value'][d['trade_intention']['sticker']])
del df['details']
return df
rows = list(db['strategy_results'].find({
'strategy_name': name,
'strategy_desc': desc,
'strategy_code': code,
# 'trading_user_id': trading_id,
'mnemonic': mnemonic,
'config_id': config_id
}))
df = pd.concat([
to_dataframe(list(db['orders'].find({
'strategy_result_id': str(r['_id'])
}))) for r in rows if r['n_orders'] > 0
])
df['month'] = df.dt.dt.month
flat_capital_metrics(df, groupby='month').T
flat_capital_metrics(df, groupby='date')['total_pnl'].hist()
plt.title('Distribution of total daily PnL.')
plt.xlabel('PnL [GBp]')
_ = plt.ylabel('Frequency [%]')
flat_capital_metrics(df, groupby='event_id')['cum_return'].hist()
plt.title('Distribution of returns per event.')
plt.xlabel('Return [%]')
_ = plt.ylabel('Frequency [-]')
Let a portfolio, $\Pi_i$, be the set of trades associated with a given signal to take a position on $n > 0$ markets. Each portfolio will typically comprise $2n$ trades (for each market we have one trade to open and one to close), though it need only have 1 actual trade to be valid. We define the following properties:
pdf = pd.DataFrame(columns=['portfolio_id', 'pnl', 'age', 'reason']).set_index('portfolio_id')
gps = df.groupby('portfolio_id')
for g in gps.groups:
gp = gps.get_group(g).sort_values('dt')
pdf.loc[g] = [gp.pnl.sum(), (gp.iloc[-1]['dt'] - gp.iloc[0]['dt']).total_seconds(), gp.iloc[-1].reason]
pdf['age'] = pdf['age'].astype(np.int)
pdf.loc[pd.isnull(pdf.reason), 'reason'] = 'unclosed'
pdf.describe().T
print 'Minimum PnL:'
pdf[pdf.pnl == pdf.pnl.min()]
print 'Maxmimum PnL:'
pdf[pdf.pnl == pdf.pnl.max()]
pnl_out_lim = pdf.pnl.quantile(0.005)
axes = pdf[pdf.pnl > pnl_out_lim][['pnl', 'age']].hist()[0]
plt.suptitle('Distribution of portfolio age and PnL (exclusing extremes).')
axes[0].set_xlabel('Age [s]')
axes[0].set_ylabel('Frequency [-]')
axes[1].set_xlabel('PnL [GBp]')
_ = axes[1].set_ylabel('Frequency [-]')
sns.distplot(pdf[pdf.pnl > pnl_out_lim][pdf.age < pdf.age.quantile(0.25)]['pnl'])
sns.distplot(pdf[pdf.pnl > pnl_out_lim][pdf.age > pdf.age.quantile(0.75)]['pnl'])
plt.legend(['Age < q25', 'Age > q75'])
plt.title('Distribution of portfolio PnLs for the upper and lower quantiles on age (exclusing extremes).')
plt.xlabel('PnL [GBp]')
_ = plt.ylabel('Frequency [-]')
df.groupby('reason')['pnl'].describe().T
f, ax = plt.subplots(1)
gps = pdf[pdf.reason.values != None][pdf.pnl > pnl_out_lim].groupby('reason')
for gid, gp in gps:
gp.pnl.hist(alpha=0.6, label=gid)
_ = plt.legend()
sns.lmplot("age", "pnl", data=pdf, hue="reason", fit_reg=False, aspect=1.8)
plt.title('Distribution of portfolio PnLs as a function of age.')
plt.xlabel('Age [s]')
_ = plt.ylabel('PnL [GBp]')
def get_ret(gp, f=lambda vs: vs[-1]):
prs = gp.sort_values('dt')['plausible_returns'].values
lens = map(len, prs)
ix = np.argmax(lens)
if any(l > 0 for l in lens):
return f(prs[ix])
else:
return 0.0
get_best_ret = lambda gp: get_ret(gp, lambda vs: max(vs))
get_worst_ret = lambda gp: get_ret(gp, lambda vs: min(vs))
gps = df.groupby('portfolio_id')
actual_rets = gps.apply(get_ret)
best_rets = gps.apply(get_best_ret)
worst_rets = gps.apply(get_worst_ret)
plt.scatter(actual_rets, best_rets-worst_rets)
plt.xlabel('$r_{actual}$ [-]')
plt.ylabel('$r_{best} - r_{worst}$ [-]')
_ = plt.title('Relationship between realised return and the range of possible returns during the portfolios life.', y=1.02)
plt.scatter(best_rets, worst_rets)
plt.xlabel('$r_{best}$ [-]')
plt.ylabel('$r_{worst}$ [-]')
_ = plt.title('Relationship between best and worst returns that could have been realised.', y=1.02)
(best_rets-actual_rets).hist(alpha=0.6)
(best_rets-worst_rets).hist(alpha=0.6)
(actual_rets-worst_rets).hist(alpha=0.6)
plt.xlabel('$r$ [-]')
plt.ylabel('Frequency [-]')
plt.title('Distribution of various differences between best/actual/worst possible returns.', y=1.02)
_ = plt.legend(['$r_{best} - r_{actual}$', '$r_{best} - r_{worst}$', '$r_{actual} - r_{worst}$'], loc='best', prop={'size': 20})
def load_df(s):
from sgmarb.backtesting.data import clean_dataframe
from stratagemdataprocessing.bookmakers.common.odds.cache import HistoricalOddsCache
hoc = HistoricalOddsCache(parse=False)
sdf = clean_dataframe(hoc.get('%s.BF' % s), min_matched=0)
if sdf is not None:
sdf['timestamp'] = pd.to_datetime(sdf.timestamp, unit='ms')
return sdf.set_index('timestamp')
else:
return None
def do_plot(pk):
edf = df[df.pair_key == pk].sort_values('dt')
stickers = edf['sticker'].unique()
sdfs = {s: load_df(s) for s in stickers}
sdfs = {s: df for (s, df) in sdfs.iteritems() if df is not None}
f, axes = plt.subplots(len(sdfs), sharex=True, figsize=(16, 4*len(sdfs)))
plt.suptitle(pk)
for i, (s, sdf) in enumerate(sdfs.iteritems()):
mp = (sdf['bp1'] + sdf['lp1']) / 2.0
axes[i].plot(sdf.index, mp, drawstyle='steps-post', color='k', alpha=0.5)
axes[i].set_title(s)
axes[i].set_ylim([1.0, min(10.0, mp.max()*1.1)])
for pid, p in edf[edf.sticker == s].groupby('portfolio_id'):
open_dt = p['dt'].min()
close_dt = p['dt'].max()
if p[p.dt == open_dt].iloc[0].is_back:
axes[i].axvspan(open_dt, close_dt, color='red', alpha=0.1, label='Back')
else:
axes[i].axvspan(open_dt, close_dt, color='blue', alpha=0.1, label='Lay')
for pk in df.pair_key.unique()[:50]:
do_plot(pk)